{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "34b936f8-c900-40d6-957e-263ec075cf06",
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install toml==0.10.2\n",
    "# !pip install einops==0.8.0\n",
    "# !pip install imagesize==1.4.1\n",
    "# !pip install voluptuous==0.15.2\n",
    "# !pip install xformers==0.0.27.post2\n",
    "# !pip install wandb==0.17.8\n",
    "# !pip install transformers==4.44.2\n",
    "# !pip install diffusers==0.30.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "a407933e-3a46-4102-9d5d-aff58cde18c2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import torch\n",
    "from time import sleep\n",
    "from tqdm import tqdm\n",
    "from PIL import Image\n",
    "import seaborn\n",
    "import plotly.express as px\n",
    "\n",
    "import yaml\n",
    "from glob import glob\n",
    "\n",
    "from joblib import Parallel, delayed\n",
    "\n",
    "import os\n",
    "\n",
    "project_path = \"/home/ilya/isic\" #TODO: replace\n",
    "\n",
    "os.environ[\"WANDB_PROJECT\"] = \"sd-finetune\"\n",
    "os.environ[\"TRANSFORMERS_CACHE\"] = os.path.join('tmp_cache')\n",
    "os.environ[\"HF_HOME\"] = os.path.join(project_path, 'tmp_cache')\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9f5700a8-42c1-4b1b-a612-4cb5d0378f48",
   "metadata": {},
   "outputs": [],
   "source": [
    "seed_name = 33\n",
    "base_name_tmp = f'syntetic_custom_base_{seed_name}'\n",
    "fold_number = '1'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "269017fe-1a19-4fcd-bfa1-11e71f0d2ded",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_path = f\"../data/artifacts/{base_name_tmp}/{fold_number}\"\n",
    "os.makedirs(dataset_path, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "35a40946-50df-4ea7-9a68-c302a7c05381",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_images = glob(\n",
    "    f\"../data/artifacts/{base_name_tmp}/{fold_number}/**.png\")\n",
    "\n",
    "for fn in all_images:\n",
    "    with open(fn.replace('.png', '.caption'), 'w') as f:\n",
    "        f.write('a photo of a malignant mole')\n",
    "\n",
    "all_caps = glob(\n",
    "    f\"../data/artifacts/{base_name_tmp}/{fold_number}/**.caption\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fa475bc8-e032-4d85-9e50-bf1cb9d82cd1",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_path = os.path.abspath(dataset_path)\n",
    "device = 'cuda:0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0de1c0d7-a14d-4cc8-8f9d-9c237607d965",
   "metadata": {},
   "outputs": [],
   "source": [
    "%cd ../kohya_ss/sd-scripts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "480f62f9-1dbc-4892-bf53-f8e317da8caf",
   "metadata": {},
   "outputs": [],
   "source": [
    "meta_Folder_name = f'../sd_datasets/small_test/{base_name_tmp}/{fold_number}'\n",
    "os.makedirs(meta_Folder_name, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1bdab4c0-b54f-4d7e-a8bd-ed23a5631ff7",
   "metadata": {},
   "outputs": [],
   "source": [
    "!python3 finetune/merge_captions_to_metadata.py {dataset_path} \\\n",
    "    {meta_Folder_name}/meta_cap_v1.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a70fb555-8ead-45e7-96d6-15fd9b921c94",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "with open(f\"{meta_Folder_name}/meta_cap_v1.json\", \"r\") as f:\n",
    "    meta_lat = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0d2260e2-9b55-4ea3-9856-750e12d3aa65",
   "metadata": {},
   "outputs": [],
   "source": [
    "!python3 finetune/prepare_buckets_latents.py \\\n",
    "     {dataset_path} \\\n",
    "    {meta_Folder_name}/meta_cap_v1.json \\\n",
    "    {meta_Folder_name}/meta_lat.json \\\n",
    "    /home/ilya/ThisIsReal \\\n",
    "    --batch_size 16 --max_resolution 128,128 --mixed_precision no --max_bucket_reso 128 \\\n",
    "    --min_bucket_reso 128 --bucket_reso_steps 64  --bucket_no_upscale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a4c3db46-d7b8-41b0-b741-28c050cbfc94",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompts_to_sample = [\n",
    "  \"a photo of a malignant mole\"\n",
    "]\n",
    "\n",
    "with open(\"prompts_to_check.txt\", \"w\") as f:\n",
    "  for ind, pr in enumerate(prompts_to_sample):\n",
    "    f.write(f\"{pr} \" \\\n",
    "    \"--n lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry \" \\\n",
    "    \"--w 128 --h 128 --d 555 --l 7 --s 35 \\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc9f6d6d-e7aa-496e-80a3-7dee846fec30",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompts_to_check = os.path.join(project_path, \"kohya_ss/sd-scripts/prompts_to_check.txt\")\n",
    "finetunes_path = os.path.join(project_path, \"/home/ilya/finetunes/mole\")\n",
    "\n",
    "finetune_name = f\"{base_name_tmp}___{fold_number}\"\n",
    "finetune_full_path = os.path.join(finetunes_path, finetune_name)\n",
    "os.makedirs(finetune_full_path, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bf806da0-bd1a-460b-82bc-23a411e39776",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "with open(f\"{meta_Folder_name}/meta_lat.json\", \"r\") as f:\n",
    "    meta_lat = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e7849ee-bf7c-4a1c-8c67-db36ab86e7c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "finetune_logs_outputs = \"logs\"\n",
    "os.makedirs(finetune_logs_outputs, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d3565782-b6ba-4c4f-8cc6-dccd55bd9afa",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_model_path = os.path.join(project_path, \"ThisIsReal\")\n",
    "wandb_api_key = '...' ###your api key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "239c552a-b4e5-4004-aff5-7c7ff598254b",
   "metadata": {},
   "outputs": [],
   "source": [
    "!accelerate launch --num_cpu_threads_per_process 4 fine_tune.py \\\n",
    "     --pretrained_model_name_or_path={base_model_path} \\\n",
    "     --in_json={meta_Folder_name}/meta_lat.json \\\n",
    "     --train_data_dir={dataset_path} \\\n",
    "     --output_dir={finetune_full_path} \\\n",
    "     --train_batch_size=8 \\\n",
    "     --dataset_repeats=1 \\\n",
    "     --resolution=128,128 \\\n",
    "     --enable_bucket \\\n",
    "     --keep_tokens=77 \\\n",
    "     --min_bucket_reso=128 \\\n",
    "     --max_bucket_reso=128 \\\n",
    "     --sample_every_n_steps=80 \\\n",
    "     --sample_sampler=k_dpm_2_a \\\n",
    "     --sample_prompts={prompts_to_check}\\\n",
    "     --learning_rate=3e-5 \\\n",
    "     --max_grad_norm=1\\\n",
    "     --learning_rate_te=1e-6 \\\n",
    "     --train_text_encoder \\\n",
    "     --max_train_epochs=50 \\\n",
    "     --lr_warmup_steps=200 \\\n",
    "     --learning_rate_te=5e-6 \\\n",
    "     --train_text_encoder \\\n",
    "     --save_every_n_epochs=5 \\\n",
    "     --noise_offset=0.05\\\n",
    "     --save_model_as=diffusers \\\n",
    "     --mixed_precision=fp16 \\\n",
    "     --save_precision=fp16 \\\n",
    "     --wandb_run_name={finetune_name} \\\n",
    "     --optimizer_type=AdamW \\\n",
    "     --gradient_checkpointing \\\n",
    "     --min_snr_gamma=5 \\\n",
    "     --lr_scheduler=constant_with_warmup \\\n",
    "     --logging_dir={finetune_logs_outputs} \\\n",
    "     --gradient_accumulation_steps=1 \\\n",
    "     --xformers \\\n",
    "     --log_with=all \\\n",
    "     --seed=222 \\\n",
    "     --wandb_api_key={wandb_api_key}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "de871d0b-3448-46e2-b8b2-7f2b3e9a499d",
   "metadata": {},
   "outputs": [],
   "source": [
    "kaggle_dataset_path = finetunes_path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "31be3e34-3713-42d1-8c42-3b9444274d7f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/jovyan/.kaggle/kaggle.json'\n",
      "Data package template written to: /home/jovyan/work/finetunes/mole/dataset-metadata.json\n"
     ]
    }
   ],
   "source": [
    "!kaggle datasets init -p {kaggle_dataset_path}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "6e18f990-01af-4056-bd90-594d637345b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json \n",
    "\n",
    "with open(os.path.join(kaggle_dataset_path, \"dataset-metadata.json\"), \"r\") as f:\n",
    "    f_r = json.load(f)\n",
    "\n",
    "f_r['title'] = \"ISIC-2024-synthetic-models\"\n",
    "f_r['id'] = \"ilya9711nov/ISIC-2024-synthetic-models\"\n",
    "\n",
    "with open(os.path.join(kaggle_dataset_path, \"dataset-metadata.json\"), \"w\") as f:\n",
    "    json.dump(f_r, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "503c53d5-e431-407d-93a4-32f57a50819e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/jovyan/.kaggle/kaggle.json'\n",
      "Starting upload for file syntetic_custom_base_1997___1.zip\n",
      " 47%|█████████████████▋                    | 16.3G/34.9G [05:45<04:50, 69.0MB/s]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting upload for file syntetic_custom_base_1997___3.zip\n",
      " 69%|██████████████████████████            | 24.0G/34.9G [05:19<02:00, 97.4MB/s]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting upload for file syntetic_custom_base_42___1.zip\n",
      " 65%|████████████████████████▊             | 22.8G/34.9G [05:28<03:56, 55.0MB/s]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting upload for file syntetic_custom_base_42___4.zip\n",
      " 60%|██████████████████████▋               | 20.8G/34.9G [05:26<04:41, 53.8MB/s]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting upload for file syntetic_custom_base_42___2.zip\n",
      " 82%|████████████████████████████████       | 28.7G/34.9G [05:19<01:00, 111MB/s]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 78%|█████████████████████████████▌        | 27.2G/34.9G [05:35<01:37, 84.7MB/s]Starting upload for file syntetic_custom_base_42___3.zip\n",
      " 29%|███████████                           | 10.2G/34.9G [06:02<21:40, 20.4MB/s]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!kaggle datasets create -p {kaggle_dataset_path} --dir-mode zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1fc5f00e-7db6-4f42-938e-9c68d6fe972e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
